#############################################################################################################
#############################################################################################################
####Non-state Atrocities in Capital Cities - ZINB Global Sample Analysis: Predicted Probabilities Model 4####
#############################################################################################################
#############################################################################################################
###Note that these predicted values are calculated on cells that can experience events. An alternative model coefficients for both
###stages are used produce practically identical results, but this one is preferred from a theoretical perspective

library(MASS) 
library(pscl)
library(foreign)
library(stargazer)
library(mvtnorm)
library(plyr)
library(robust)

# Set working library
setwd("~/Data/Global Analysis/")
# Read in main data
main.data <- read.dta("full.grid.upd.dta")
###Run Model 4 (silent)
at.zinb.4 <- zeroinfl(incidentnonstatefull ~ lag_Capital + lagcivconflagtemp + lagincidentsumfull + lagurban + loglagross_oil_prod
                      + loglagppp + loglagbdist1 + lagp_polity2 + loglagpop + loglagttime + loglagcellarea + year96 + year97+
                        year98 + year99 + year00 + year01 + year02 + year03 + year04 + year05 + year06 + year07 + year08
                      |lagcivconflagtemp + lagincidentsumfull + lagurban + loglagross_oil_prod
                      + loglagppp + loglagbdist1 + lagp_polity2 + loglagpop + loglagttime + loglagcellarea, 
                      dist = "negbin", data = main.data)

#####################
###Capital City FD###
#####################
###create fake data
non.miss.data.oil<-na.omit(main.data[,c("incidentnonstatefull", "lag_Capital", "loglagttime", "lagcivconflagtemp","loglagross_oil_prod", "loglagppp", "lagincidentsumfull", "lagurban", "loglagbdist1", "gid", "loglagcellarea","lagp_polity2", "loglagpop", "year95", "year96","year97","year98","year99","year00","year01","year02","year03","year04","year05","year06","year07","year08","year09", "year")])
lagcivconflagtemp <- as.numeric(median(non.miss.data.oil$lagcivconflagtemp))
lagincidentsumfull <- as.numeric(median(non.miss.data.oil$lagincidentsumfull))
lagurban <- as.numeric(mean(non.miss.data.oil$lagurban))
lagross_oil_prod <- as.numeric(mean(non.miss.data.oil$loglagross_oil_prod))
loglagcapdist <- as.numeric(mean(non.miss.data.oil$lagcapdist))
lagppp <- as.numeric(mean(non.miss.data.oil$loglagppp))
lagbdist1 <- as.numeric(mean(non.miss.data.oil$loglagbdist1))
lagp_polity2 <- as.numeric(median(non.miss.data.oil$lagp_polity2))
loglagpop <- as.numeric(mean(non.miss.data.oil$loglagpop))
loglagttime <- as.numeric(mean(non.miss.data.oil$loglagttime))
lagcellarea <- as.numeric(mean(non.miss.data.oil$loglagcellarea))
year96 <- as.numeric(median(non.miss.data.oil$year96))
year97 <- as.numeric(median(non.miss.data.oil$year97))
year98 <- as.numeric(median(non.miss.data.oil$year98))
year99 <- as.numeric(median(non.miss.data.oil$year99))
year00 <- as.numeric(median(non.miss.data.oil$year00))
year01 <- as.numeric(median(non.miss.data.oil$year01))
year02 <- as.numeric(median(non.miss.data.oil$year02))
year03 <- as.numeric(median(non.miss.data.oil$year03))
year04 <- as.numeric(median(non.miss.data.oil$year04))
year05 <- as.numeric(median(non.miss.data.oil$year05))
year06 <- as.numeric(median(non.miss.data.oil$year06))
year07 <- as.numeric(median(non.miss.data.oil$year07))
year08 <- as.numeric(median(non.miss.data.oil$year08))
###Set seed
set.seed(2)
#set sims  (number of simulations)
sims <- 1000
#create some storage matrices
p1<-NULL
p2<-NULL
all.expectedvalues1<-NULL
all.expectedvalues2<-NULL
First.Differences<-NULL
First.Diff.Per <- NULL
all.first.diff.per <- NULL
all.firstdifferences<-NULL
history<-matrix(0,nrow=1,ncol=3)
hist.per <- matrix(0,nrow=1,ncol=3)
#Set coefficients
c <- c(at.zinb.4$coeff$count, at.zinb.4$coeff$zero)
#start loop
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=at.zinb.4$vcov)
  bsimc<-bsim[,1:25]
  xoutcome.1<-rbind(1,0,lagcivconflagtemp, lagincidentsumfull, lagurban, lagross_oil_prod, lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ####Creating the matrices
  equationcount<-bsimc %*% xoutcome.1
  expectedvalue1<-exp(equationcount) 
  ###First difference change prediction
  xoutcome.2<-rbind(1,1,lagcivconflagtemp, lagincidentsumfull, lagurban, lagross_oil_prod, lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ###Creating the matrices
  equationcount2<-bsimc %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per

####################
##Urban Areas FD####
####################
#Calculate quantiles for variable range
summary(non.miss.data.oil$lagurban)
quantile(non.miss.data.oil$lagurban, probs = 0.1)
quantile(non.miss.data.oil$lagurban, probs = 0.9)
#Create a constant variable for capital
lagcapital<- as.numeric(median(non.miss.data.oil$lag_Capital))
set.seed(2)
#set sims 
sims <- 1000
#create some storage matrices
p1<-NULL
p2<-NULL
all.expectedvalues1<-NULL
all.expectedvalues2<-NULL
First.Differences<-NULL
First.Diff.Per <- NULL
all.first.diff.per <- NULL
all.firstdifferences<-NULL
history<-matrix(0,nrow=1,ncol=3)
hist.per <- matrix(0,nrow=1,ncol=3)
#values to sequence X across
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=at.zinb.4$vcov)
  bsimc<-bsim[,1:25]
  xoutcome.1<-rbind(1,lagcapital,lagcivconflagtemp, lagincidentsumfull, 0, lagross_oil_prod, lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ####Creating the matrices
  equationcount<-bsimc %*% xoutcome.1
  expectedvalue1<-exp(equationcount)
  ###First difference change prediction
  xoutcome.2<-rbind(1,lagcapital,lagcivconflagtemp, lagincidentsumfull, 0.207, lagross_oil_prod, lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ###Creating the matrices
  equationcount2<-bsimc %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per

#######################
###Civil Conflict FD###
#######################
set.seed(2)
#set sims 
sims <- 1000
#create some storage matrices
p1<-NULL
p2<-NULL
all.expectedvalues1<-NULL
all.expectedvalues2<-NULL
First.Differences<-NULL
First.Diff.Per <- NULL
all.first.diff.per <- NULL
all.firstdifferences<-NULL
history<-matrix(0,nrow=1,ncol=3)
hist.per <- matrix(0,nrow=1,ncol=3)
#values to sequence X across
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=at.zinb.4$vcov) #
  bsimc<-bsim[,1:25]
  xoutcome.1<-rbind(1,lagcapital,0, lagincidentsumfull, lagurban, lagross_oil_prod, lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ####Creating the matrices
  equationcount<-bsimc %*% xoutcome.1
  expectedvalue1<-exp(equationcount)
  ###First difference change prediction
  xoutcome.2<-rbind(1,lagcapital,1, lagincidentsumfull, lagurban, lagross_oil_prod, lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ###Creating the matrices
  equationcount2<-bsimc %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per

############
###GCP FD###
############
quantile(non.miss.data.oil$loglagppp, probs = 0.1)
quantile(non.miss.data.oil$loglagppp, probs = 0.9)
set.seed(2)
#set sims 
sims <- 1000
#create some storage matrices
p1<-NULL
p2<-NULL
all.expectedvalues1<-NULL
all.expectedvalues2<-NULL
First.Differences<-NULL
First.Diff.Per <- NULL
all.first.diff.per <- NULL
all.firstdifferences<-NULL
history<-matrix(0,nrow=1,ncol=3)
hist.per <- matrix(0,nrow=1,ncol=3)
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=at.zinb.4$vcov)
  bsimc<-bsim[,1:25]
  xoutcome.1<-rbind(1,lagcapital,lagcivconflagtemp, lagincidentsumfull, lagurban, lagross_oil_prod, quantile(non.miss.data.oil$loglagppp, probs = 0.1), 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ####Creating the matrices
  equationcount<-bsimc %*% xoutcome.1
  expectedvalue1<-exp(equationcount)
  ###First difference change prediction
  xoutcome.2<-rbind(1,lagcapital,lagcivconflagtemp, lagincidentsumfull, lagurban, lagross_oil_prod, quantile(non.miss.data.oil$loglagppp, probs = 0.9), 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ###Creating the matrices
  equationcount2<-bsimc %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per

############
###Oil FD###
############
quantile(non.miss.data.oil$loglagross_oil_prod, probs = 0.1)
quantile(non.miss.data.oil$loglagross_oil_prod, probs = 0.9)
set.seed(2)
#set sims 
sims <- 1000
#create some storage matrices
p1<-NULL
p2<-NULL
all.expectedvalues1<-NULL
all.expectedvalues2<-NULL
First.Differences<-NULL
First.Diff.Per <- NULL
all.first.diff.per <- NULL
all.firstdifferences<-NULL
history<-matrix(0,nrow=1,ncol=3)
hist.per <- matrix(0,nrow=1,ncol=3)
for(j in 1:sims){ 
  #predicted#
  bsim<-rmvnorm(1, mean=c, sigma=at.zinb.4$vcov) 
  bsimc<-bsim[,1:25]
  xoutcome.1<-rbind(1,lagcapital,lagcivconflagtemp, lagincidentsumfull, lagurban, quantile(non.miss.data.oil$loglagross_oil_prod, probs = 0.1), lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ####Creating the matrices
  equationcount<-bsimc %*% xoutcome.1
  expectedvalue1<-exp(equationcount)
  ###First difference change prediction
  xoutcome.2<-rbind(1,lagcapital,lagcivconflagtemp, lagincidentsumfull, lagurban, quantile(non.miss.data.oil$loglagross_oil_prod, probs = 0.9), lagppp, 
                    lagbdist1, lagp_polity2, loglagpop, loglagttime, lagcellarea, year96,year97,year98,year99,year00,year01,year02, 
                    year03, year04,year05,
                    year06,year07,year08)
  ###Creating the matrices
  equationcount2<-bsimc %*% xoutcome.2
  expectedvalue2<-exp(equationcount2)
  Firstdifference<-expectedvalue2-expectedvalue1       
  First.Diff.Per <- ((expectedvalue2-expectedvalue1)/expectedvalue1)*100
  all.firstdifferences<-c(all.firstdifferences, Firstdifference)
  all.first.diff.per<- c(all.first.diff.per, First.Diff.Per)
  ###end loop
}
###Create summary
sorted.firstdifferences<-sort(all.firstdifferences)
history[1,1]<-mean(sorted.firstdifferences)
history[1,2]<-sorted.firstdifferences[round(0.95*length(sorted.firstdifferences))]
history[1,3]<-sorted.firstdifferences[round(0.05*length(sorted.firstdifferences))]
###Percent summary
sorted.first.diff.per<-sort(all.first.diff.per)
hist.per[1,1]<- mean(sorted.first.diff.per)
hist.per[1,2] <- sorted.first.diff.per[round(0.95*length(sorted.first.diff.per))]
hist.per[1,3] <- sorted.first.diff.per[round(0.05*length(sorted.first.diff.per))]
###end final loop
#}
history
hist.per